library(tidyverse)
library(phyloseq)
library(speedyseq)
library(ggrepel)
library(ampvis2)
library(plotly)
library(microbiome)
library(here)
options(getClass.msg=FALSE) # https://github.com/epurdom/clusterExperiment/issues/66
#this fixes an error message that pops up because the class 'Annotated' is defined in two different packages
'%!in%' <- function(x,y)!('%in%'(x,y))
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_taxa_tests.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_normalisation.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_alpha.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_beta.R")
source("https://raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_heatmap.R")
plot_time <- function(df,
measure,
x = "Day_from_Inoculum",
y = "value",
shape = "neg",
fill = "Reactor_Treatment",
group = "Reactor_Treatment",
point_size=0.5,
facet,
smooth = FALSE)
{
df %>%
dplyr::filter(alphadiversiy %in% measure) %>%
dplyr::mutate(alphadiversiy = fct_reorder(alphadiversiy, value, .desc = TRUE)) %>%
dplyr::mutate(neg = ifelse(value == 0, "neg", "pos")) %>%
arrange(Day_from_Inoculum) %>%
ggplot(aes_string(x = x,
y = y)) +
geom_jitter(alpha=0.9, size = point_size, aes_string(color = fill, fill = fill, shape = shape), show.legend = TRUE) +
geom_path(inherit.aes = TRUE, aes_string(fill = fill, color = fill, show.legend = FALSE),
size = 0.01,
linetype = "dashed") +
facet_grid(as.formula(facet), scales = "free") +
geom_vline(xintercept = c(0),
color="black", alpha=0.4) + theme_light() -> plot
if(smooth == TRUE)
{
plot +
geom_smooth(show.legend = FALSE, level = 0.95, alpha=0.005, size = 0.5 ,aes_string(color = fill, fill = fill)) -> plot
}
# scale_y_continuous(labels = scientific,
# limits=c(1e+10, 1e+11), breaks = seq(1e+10, 1e+11, by = 1e+10),
# trans = "log10") +
return(plot + theme(legend.position = "bottom"))
}
ps = "data/raw/metabarcoding/ps_silva_dada2_human_chicken_meta.RDS"
ps %>%
here::here() %>%
readRDS() %>%
phyloseq_get_strains_fast() %>%
filter_taxa(function(x) sum(x > 0) > 0, TRUE) %>%
prune_samples(sample_sums(.)>= 100, .) %>% # only keep samples with at least 100 seqs.
subset_samples(Reactor %in% c("STRAIN", "negative", "positive")) %>%
# subset_samples(Experiment == "CCUG59168" | Experiment == "HV292.1" | Experiment == "Cecum" | Experiment == "Continuous" & Reactor %in% c("IR1", "CR")) %>%
# rarefy_even_depth(sample.size = 2574,rngseed = 123) %>%
phyloseq_ampvis_heatmap(transform = "compositional",
group_by = "SampleID",
facet_by = c("Fermentation", "Enrichment", "Reactor", "Reactor_Treatment"),
tax_aggregate = "Species",
tax_add = NULL,
ntax = 50) -> p
## Joining, by = "ASV"
p$data %>%
mutate(Abundance = na_if(Abundance, 0)) -> p$data
p + facet_grid( ~ Reactor , scales = "free", space = "free") +
scale_fill_viridis_c(breaks = c(0, 0.01, 1, 10, 50, 75, 100),
labels = c(0, 0.01, 1, 10, 50, 75, 100),
trans = scales::pseudo_log_trans(sigma = 0.001),
na.value = 'transparent') -> p2
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p2
# p2 %>%
# export::graph2ppt(append = TRUE,
# file = file.path(here::here("data/processed/figures_NRP72")))
ps = "data/raw/metabarcoding/ps_silva_dada2_human_chicken_meta.RDS"
ps %>%
here::here() %>%
readRDS() %>%
phyloseq_get_strains_fast() %>%
filter_taxa(function(x) sum(x > 0) > 0, TRUE) %>%
subset_samples(Enrichment == "NotEnriched") -> physeq
## Joining, by = "ASV"
physeq
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1110 taxa and 414 samples ]:
## sample_data() Sample Data: [ 414 samples by 59 sample variables ]:
## tax_table() Taxonomy Table: [ 1110 taxa by 8 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 1110 tips and 1109 internal nodes ]:
## refseq() DNAStringSet : [ 1110 reference sequences ]
## taxa are rows
physeq %>%
sample_data() %>%
data.frame() %>%
DT::datatable()
physeq %>%
rarefy_even_depth(sample.size = 4576,
rngseed = 123) -> physeq_rare
## `set.seed(123)` was used to initialize repeatable random subsampling.
## Please record this for your records so others can reproduce.
## Try `set.seed(123); .Random.seed` for the full vector
## ...
## 73 samples removedbecause they contained fewer reads than `sample.size`.
## Up to first five removed samples are:
## CR-40-S166CR-52-S196EMPTY-S384negativ-S80negative2-S181
## ...
## 351OTUs were removed because they are no longer
## present in any sample after random subsampling
## ...
physeq_rare
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 759 taxa and 341 samples ]:
## sample_data() Sample Data: [ 341 samples by 59 sample variables ]:
## tax_table() Taxonomy Table: [ 759 taxa by 8 taxonomic ranks ]:
## phy_tree() Phylogenetic Tree: [ 759 tips and 758 internal nodes ]:
## refseq() DNAStringSet : [ 759 reference sequences ]
## taxa are rows
heatmap:
physeq_rare %>%
subset_samples(Treatment == "STRAIN" | Model == "Human" & Reactor %!in% c("negative", "positive")) %>%
# subset_samples(Experiment == "CCUG59168" | Experiment == "HV292.1" | Experiment == "Cecum" | Experiment == "Continuous" & Reactor %in% c("IR1", "CR")) %>%
# rarefy_even_depth(sample.size = 2574,rngseed = 123) %>%
phyloseq_ampvis_heatmap(transform = "compositional",
group_by = "SampleID",
facet_by = c("Fermentation", "Enrichment", "Reactor_Treatment"),
tax_aggregate = "Species",
tax_add = NULL,
ntax = 100) -> p
p$data %>%
mutate(Abundance = na_if(Abundance, 0)) -> p$data
p + facet_grid( ~ Fermentation + Reactor_Treatment , scales = "free", space = "free") +
scale_fill_viridis_c(breaks = c(0, 0.01, 0.1, 0.2, 0.50, 0.75, 0.100),
labels = c(0, 0.01, 0.1, 0.2, 0.50, 0.75, 0.100),
trans = scales::pseudo_log_trans(sigma = 0.001),
na.value = 'transparent') -> p2
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p2
# p2 %>%
# export::graph2ppt(append = TRUE,
# file = file.path(here::here("data/processed/figures_NRP72")))
same at the genus level
physeq_rare %>%
subset_samples(Treatment == "STRAIN" | Model == "Human" & Reactor %!in% c("negative", "positive")) %>%
# subset_samples(Experiment == "CCUG59168" | Experiment == "HV292.1" | Experiment == "Cecum" | Experiment == "Continuous" & Reactor %in% c("IR1", "CR")) %>%
# rarefy_even_depth(sample.size = 2574,rngseed = 123) %>%
phyloseq_ampvis_heatmap(transform = "compositional",
group_by = "SampleID",
facet_by = c("Fermentation", "Enrichment", "Reactor_Treatment"),
tax_aggregate = "Genus",
tax_add = NULL,
ntax = 50) -> p
p$data %>%
mutate(Abundance = na_if(Abundance, 0)) -> p$data
p + facet_grid( ~ Fermentation + Reactor_Treatment , scales = "free", space = "free") +
scale_fill_viridis_c(breaks = c(0, 0.01, 0.1, 0.2, 0.50, 0.75, 0.100),
labels = c(0, 0.01, 0.1, 0.2, 0.50, 0.75, 0.100),
trans = scales::pseudo_log_trans(sigma = 0.001),
na.value = 'transparent') -> p2
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p2
# p2 %>%
# export::graph2ppt(append = TRUE,
# file = file.path(here::here("data/processed/figures_NRP72")))
same at the genus level
physeq_rare %>%
subset_samples(Treatment == "STRAIN" | Experiment == "Continuous") %>%
# subset_samples(Experiment == "CCUG59168" | Experiment == "HV292.1" | Experiment == "Cecum" | Experiment == "Continuous" & Reactor %in% c("IR1", "CR")) %>%
# rarefy_even_depth(sample.size = 2574,rngseed = 123) %>%
phyloseq_ampvis_heatmap(transform = "compositional",
group_by = "SampleID",
facet_by = c("Model", "Fermentation", "Reactor_Treatment"),
tax_aggregate = "Genus",
tax_add = NULL,
ntax = 50) -> p
p$data %>%
mutate(Abundance = na_if(Abundance, 0)) -> p$data
p + facet_grid( ~Model + Fermentation + Reactor_Treatment , scales = "free", space = "free") +
scale_fill_viridis_c(breaks = c(0, 0.01, 0.1, 0.2, 0.50, 0.75, 0.100),
labels = c(0, 0.01, 0.1, 0.2, 0.50, 0.75, 0.100),
trans = scales::pseudo_log_trans(sigma = 0.001),
na.value = 'transparent') -> p2
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p2
# p2 %>%
# export::graph2ppt(append = TRUE,
# file = file.path(here::here("data/processed/figures_NRP72")))
physeq_rare %>%
subset_samples(Treatment == "STRAIN" | Experiment == "Continuous") %>%
# subset_samples(Experiment == "CCUG59168" | Experiment == "HV292.1" | Experiment == "Cecum" | Experiment == "Continuous" & Reactor %in% c("IR1", "CR")) %>%
# rarefy_even_depth(sample.size = 2574,rngseed = 123) %>%
phyloseq_ampvis_heatmap(transform = "compositional",
group_by = "SampleID",
facet_by = c("Model", "Fermentation", "Reactor_Treatment"),
tax_aggregate = "Species",
tax_add = NULL,
ntax = 200) -> p
p$data %>%
mutate(Abundance = na_if(Abundance, 0)) -> p$data
p + facet_grid( ~Model + Fermentation + Reactor_Treatment , scales = "free", space = "free") +
scale_fill_viridis_c(breaks = c(0, 0.01, 0.1, 0.2, 0.50, 0.75, 0.100),
labels = c(0, 0.01, 0.1, 0.2, 0.50, 0.75, 0.100),
trans = scales::pseudo_log_trans(sigma = 0.001),
na.value = 'transparent') -> p2
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
p2
# p2 %>%
# export::graph2ppt(append = TRUE,
# file = file.path(here::here("data/processed/figures_NRP72")))
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] reshape2_1.4.4 scales_1.1.1 here_1.0.1
## [4] microbiome_1.10.0 plotly_4.9.2.1 ampvis2_2.6.5
## [7] ggrepel_0.8.2 speedyseq_0.5.0 phyloseq_1.34.0
## [10] forcats_0.5.0 stringr_1.4.0 dplyr_1.0.4
## [13] purrr_0.3.4 readr_1.4.0 tidyr_1.1.2
## [16] tibble_3.0.6 ggplot2_3.3.3 tidyverse_1.3.0.9000
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_2.0-0 ellipsis_0.3.1
## [4] rprojroot_2.0.2 XVector_0.28.0 fs_1.5.0
## [7] rstudioapi_0.13 farver_2.0.3 ggnet_0.1.0
## [10] DT_0.15 lubridate_1.7.9 xml2_1.3.2
## [13] codetools_0.2-16 splines_4.0.2 doParallel_1.0.16
## [16] knitr_1.31 ade4_1.7-16 jsonlite_1.7.2
## [19] broom_0.7.2 cluster_2.1.0 dbplyr_1.4.4
## [22] compiler_4.0.2 httr_1.4.2 backports_1.2.1
## [25] assertthat_0.2.1 Matrix_1.2-18 lazyeval_0.2.2
## [28] cli_2.3.0 htmltools_0.5.1.1 prettyunits_1.1.1
## [31] tools_4.0.2 igraph_1.2.6 gtable_0.3.0
## [34] glue_1.4.2 Rcpp_1.0.6 Biobase_2.50.0
## [37] cellranger_1.1.0 vctrs_0.3.6 Biostrings_2.56.0
## [40] multtest_2.44.0 ape_5.4-1 nlme_3.1-149
## [43] iterators_1.0.13 crosstalk_1.1.0.1 xfun_0.21
## [46] network_1.16.0 rvest_0.3.6 lifecycle_1.0.0
## [49] zlibbioc_1.34.0 MASS_7.3-52 hms_1.0.0
## [52] parallel_4.0.2 biomformat_1.7.0 rhdf5_2.32.4
## [55] RColorBrewer_1.1-2 yaml_2.2.1 stringi_1.5.3
## [58] highr_0.8 S4Vectors_0.26.1 foreach_1.5.1
## [61] permute_0.9-5 BiocGenerics_0.34.0 rlang_0.4.10
## [64] pkgconfig_2.0.3 evaluate_0.14 lattice_0.20-41
## [67] Rhdf5lib_1.10.1 patchwork_1.1.0 htmlwidgets_1.5.3
## [70] tidyselect_1.1.0 plyr_1.8.6 magrittr_2.0.1
## [73] R6_2.5.0 IRanges_2.22.2 generics_0.1.0
## [76] DBI_1.1.1 pillar_1.4.7 haven_2.3.1
## [79] withr_2.4.1 mgcv_1.8-32 survival_3.2-3
## [82] modelr_0.1.8 crayon_1.4.1 rmarkdown_2.4
## [85] progress_1.2.2 grid_4.0.2 readxl_1.3.1
## [88] data.table_1.13.6 blob_1.2.1 vegan_2.5-7
## [91] reprex_0.3.0 digest_0.6.27 stats4_4.0.2
## [94] munsell_0.5.0 viridisLite_0.3.0